library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
setwd("~/Desktop/publpol118x/a1")
library(tidyverse)

year <- 2020
quarters <- 1:4
type <- "Electric"

pge_20_elec <- NULL

for(quarter in quarters) {
  
  filename <- 
    paste0(
      "PGE_",
      year,
      "_Q",
      quarter,
      "_",
      type,
      "UsageByZip.csv"
    )

  print(filename)
  
  temp <- read_csv(filename)
  
  pge_20_elec <- rbind(pge_20_elec,temp)

  saveRDS(pge_20_elec, "pge_20_elec.rds")
}
## [1] "PGE_2020_Q1_ElectricUsageByZip.csv"
## Rows: 7865 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2020_Q2_ElectricUsageByZip.csv"
## Rows: 7791 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2020_Q3_ElectricUsageByZip.csv"
## Rows: 7784 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2020_Q4_ElectricUsageByZip.csv"
## Rows: 7771 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(tidyverse)

year <- 2017
quarters <- 1:4
type <- "Electric"

pge_17_elec <- NULL

for(quarter in quarters) {
  
  filename <- 
    paste0(
      "PGE_",
      year,
      "_Q",
      quarter,
      "_",
      type,
      "UsageByZip.csv"
    )

  print(filename)
  
  temp <- read_csv(filename)
  
  pge_17_elec <- rbind(pge_17_elec,temp)

  saveRDS(pge_17_elec, "pge_17_elec.rds")
}
## [1] "PGE_2017_Q1_ElectricUsageByZip.csv"
## Rows: 7776 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2017_Q2_ElectricUsageByZip.csv"
## Rows: 7819 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2017_Q3_ElectricUsageByZip.csv"
## Rows: 7842 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2017_Q4_ElectricUsageByZip.csv"
## Rows: 10455 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(tidyverse)

year <- 2018
quarters <- 1:4
type <- "Electric"

pge_18_elec <- NULL

for(quarter in quarters) {
  
  filename <- 
    paste0(
      "PGE_",
      year,
      "_Q",
      quarter,
      "_",
      type,
      "UsageByZip.csv"
    )

  print(filename)
  
  temp <- read_csv(filename)
  
  pge_18_elec <- rbind(pge_18_elec,temp)

  saveRDS(pge_18_elec, "pge_18_elec.rds")
}
## [1] "PGE_2018_Q1_ElectricUsageByZip.csv"
## Rows: 7852 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2018_Q2_ElectricUsageByZip.csv"
## Rows: 7880 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2018_Q3_ElectricUsageByZip.csv"
## Rows: 7877 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2018_Q4_ElectricUsageByZip.csv"
## Rows: 7875 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(tidyverse)

year <- 2019
quarters <- 1:4
type <- "Electric"

pge_19_elec <- NULL

for(quarter in quarters) {
  
  filename <- 
    paste0(
      "PGE_",
      year,
      "_Q",
      quarter,
      "_",
      type,
      "UsageByZip.csv"
    )

  print(filename)
  
  temp <- read_csv(filename)
  
  pge_19_elec <- rbind(pge_19_elec,temp)

  saveRDS(pge_19_elec, "pge_19_elec.rds")
}
## [1] "PGE_2019_Q1_ElectricUsageByZip.csv"
## Rows: 7870 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2019_Q2_ElectricUsageByZip.csv"
## Rows: 7887 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2019_Q3_ElectricUsageByZip.csv"
## Rows: 7891 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2019_Q4_ElectricUsageByZip.csv"
## Rows: 7874 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(tidyverse)

year <- 2021
quarters <- 1:2
type <- "Electric"

pge_21_elec <- NULL

for(quarter in quarters) {
  
  filename <- 
    paste0(
      "PGE_",
      year,
      "_Q",
      quarter,
      "_",
      type,
      "UsageByZip.csv"
    )

  print(filename)
  
  temp <- read_csv(filename)
  
  pge_21_elec <- rbind(pge_21_elec,temp)

  saveRDS(pge_21_elec, "pge_21_elec.rds")
}
## [1] "PGE_2021_Q1_ElectricUsageByZip.csv"
## Rows: 7580 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] "PGE_2021_Q2_ElectricUsageByZip.csv"
## Rows: 7611 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): CUSTOMERCLASS, COMBINED
## dbl (3): ZIPCODE, MONTH, YEAR
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pge_elec_17_21 <- rbind(pge_17_elec, pge_18_elec, pge_19_elec, pge_20_elec, pge_21_elec)
pge_filter <- 
  filter(
    pge_elec_17_21,
    CUSTOMERCLASS %in% 
      c(
        "Elec- Residential",
        "Elec- Commercial"
      )
  )
table(pge_filter$COMBINED)
## 
##     N     Y 
## 48836 43915
pge_select <-
  select(
    pge_filter,
    -c(COMBINED, AVERAGEKWH)
  )
pge_group <-
  group_by(
    pge_select,
    MONTH,
    CUSTOMERCLASS
  )
pge_summarize <-
  summarize(
    pge_group,
    TOTALKWH = 
      sum(
        TOTALKWH, 
        na.rm = T
      ),
    TOTALCUSTOMERS =
      sum(
        TOTALCUSTOMERS,
        na.rm = T
      ),
    TOTALKBTU =
      TOTALKWH*3.412
  )
## `summarise()` has grouped output by 'MONTH'. You can override using the `.groups` argument.
pge_mutate <-
  mutate(
    pge_summarize,
    AVERAGEKBTU =
      TOTALKBTU/TOTALCUSTOMERS
  )
pge_mutate_res <-
  filter(
    pge_mutate,
    CUSTOMERCLASS %in% 
      c(
        "Elec- Residential"
  )
  )
pge_mutate_com <-
  filter(
    pge_mutate,
    CUSTOMERCLASS %in% 
      c(
        "Elec- Commercial"
  )
  )
library(tidyverse)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
pge_chart_res <-
  pge_mutate_res %>% 
  ggplot() +
  geom_bar(
    aes(
      x = MONTH %>% factor(),
      y = TOTALKBTU,
      fill = CUSTOMERCLASS
    ),
    stat = "identity",
    position = "stack"
  ) +
  labs(
    x = "Month",
    y = "kBTU",
    title = "PG&E Territory Monthly Electricity Usage (Residential)",
    fill = "Electricity Type"
  )

pge_chart_res %>% ggplotly()

```